
/****** Section 1: Prepare stacked panel dataset
1. For each panel: 
	a.Only keeping needed variables, 
	b.generate panel and wave id variables, 
2. stack 2006, 2008, 2010, and 2016-2020 GSS panels,
4. and convert resulting stacked panels dataset from wide to long form******/

log using log_recode, replace

/*Code for 2006 Panel*/
use "original_dataset/GSS_panel06w123_R6a.dta"

keep dateintv_*   natarms_* natenvir_* natfare_* natcrimy_* natrace_* natsci_* partyid_* polviews_* letin1a_* race_* income_* sex_*  age_* degree_*

generate panel_id =  "100"+ string(_n)
destring panel_id, replace

generate panel = 1

rename *_1 *_2006
rename *_2 *_2008
rename *_3 *_2010




reshape long age_ dateintv_ degree_ natarms_ natcrimy_ natenvir_ natfare_ natrace_ natsci_ partyid_ polviews_ race_ income_ sex_ letin1a_ , i(panel_id) j(year)
rename *_ *

save "subset_panel/GSS_2006Panel.dta", replace

clear

/*Code for 2008 Panel*/

use "original_dataset/GSS_panel08w123_R6.dta"


keep dateintv_*   natarms_* natenvir_* natfare_* natcrimy_* natrace_* natsci_* partyid_* polviews_* letin1a_* race_* income_* sex_*  age_* degree_*

generate panel_id =  "200"+ string(_n)
destring panel_id, replace

generate panel = 2

rename *_1 *_2008
rename *_2 *_2010
rename *_3 *_2012


reshape long age_ dateintv_ degree_ natarms_ natcrimy_ natenvir_ natfare_ natrace_ natsci_ partyid_ polviews_ race_ income_ sex_ letin1a_ , i(panel_id) j(year)
rename *_ *

save "subset_panel/GSS_2008Panel.dta", replace

clear


/*Code for 2010 Panel*/

use "original_dataset/GSS_panel2010w123_R6.dta"


keep dateintv_*   natarms_* natenvir_* natfare_* natcrimy_* natrace_* natsci_* partyid_* polviews_* letin1a_* race_* income_* sex_*  age_* degree_*

generate panel_id =  "300"+ string(_n)
destring panel_id, replace

generate panel = 3

rename *_1 *_2010
rename *_2 *_2012
rename *_3 *_2014


reshape long age_ dateintv_ degree_  natarms_ natcrimy_ natenvir_ natfare_ natrace_ natsci_ partyid_ polviews_ race_ income_ sex_ letin1a_ , i(panel_id) j(year)
rename *_ *


save "subset_panel/GSS_2010Panel.dta", replace

clear


/*Code for 2016-2020 Panel*/

use "original_dataset/gss2020panel_r1a.dta"


keep dateintv_*   natarms_* natenvir_* natfare_* natcrimy_* natrace_* natsci_* partyid_* polviews_* letin1a_* race_* income_* sex_*  age_* degree_*

generate panel_id =  "400"+ string(_n)
destring panel_id, replace

generate panel = 4

rename *_1a *_2016
rename *_1b *_2018
rename *_2 *_2020

reshape long age_ dateintv_ degree_ natarms_ natcrimy_ natenvir_ natfare_ natrace_ natsci_ partyid_ polviews_ race_ income_ sex_ letin1a_ , i(panel_id) j(year)
rename *_ *

save "subset_panel/GSS_2016_2020Panel.dta", replace

clear

/*Appending datasets and removing observations with all retained variables missing*/

append using "subset_panel/GSS_2006Panel.dta" "subset_panel/GSS_2008Panel.dta" "subset_panel/GSS_2010Panel.dta" "subset_panel/GSS_2016_2020Panel.dta"


drop if inlist(age, .i, .y, .d, .n, .s, .a)  & inlist(dateintv, .i, .y, .d, .n, .s, .a) & inlist(degree, .i, .y, .d, .n, .s, .a) & inlist(natarms, .i, .y, .d, .n, .s, .a)  & inlist(natcrimy, .i, .y, .d, .n, .s, .a) & inlist(natenvir, .i, .y, .d, .n, .s, .a) & inlist(natfare, .i, .y, .d, .n, .s, .a) & inlist(natrace, .i, .y, .d, .n, .s, .a) & inlist(natsci, .i, .y, .d, .n, .s, .a)  & inlist(partyid, .i, .y, .d, .n, .s, .a)  & inlist(polviews, .i, .y, .d, .n, .s, .a) & inlist(race, .i, .y, .d, .n, .s, .a) & inlist(income, .i, .y, .d, .n, .s, .a) & inlist(sex, .i, .y, .d, .n, .s, .a)   & inlist(letin1a, .i, .y, .d, .n, .s, .a) 


save "GSS_2006_2020AppendedPanels.dta", replace

clear


/****** Section 2: Recoded stacked panels dataset as outlined in the provided codebook******/

use "GSS_2006_2020AppendedPanels.dta"

codebook *

/*Format interview date*/
generate dayintv = mod(dateintv, 100)

generate monthintv = floor(dateintv /100)

rename year yearintv

generate stata_intvdate = mdy(monthintv, dayintv, yearintv )

label variable stata_intvdate "interview date in days since Jan. 1. 1960"

generate combined_id = _n


/*Reverse coding DVs*/


codebook natarms 

generate rec_natarms = natarms 
replace rec_natarms = 1 if natarms == 3
replace rec_natarms = 2 if natarms == 2
replace rec_natarms = 3 if natarms == 1

tab rec_natarms natarms, missing 





codebook natcrimy 

generate rec_natcrimy = natcrimy 
replace rec_natcrimy = 1 if natcrimy == 3
replace rec_natcrimy = 2 if natcrimy == 2
replace rec_natcrimy = 3 if natcrimy == 1

tab rec_natcrimy natcrimy, missing 



codebook natenvir 

generate rec_natenvir = natenvir
replace rec_natenvir = 1 if natenvir == 3
replace rec_natenvir = 2 if natenvir == 2
replace rec_natenvir = 3 if natenvir == 1

tab rec_natenvir natenvir, missing 


codebook natfare 

generate rec_natfare = natfare
replace rec_natfare = 1 if natfare == 3
replace rec_natfare = 2 if natfare == 2
replace rec_natfare = 3 if natfare == 1

tab rec_natfare natfare, missing 


codebook natrace 

generate rec_natrace = natrace
replace rec_natrace = 1 if natrace == 3
replace rec_natrace = 2 if natrace == 2
replace rec_natrace = 3 if natrace == 1

tab rec_natrace natrace, missing 


codebook natsci 

generate rec_natsci = natsci
replace rec_natsci = 1 if natsci == 3
replace rec_natsci = 2 if natsci == 2
replace rec_natsci = 3 if natsci == 1

tab rec_natsci natsci, missing 


codebook letin1a
 
generate rec_letin1a = letin1a
replace rec_letin1a = 1 if letin1a == 5
replace rec_letin1a = 2 if letin1a == 4
replace rec_letin1a = 3 if letin1a == 3
replace rec_letin1a = 4 if letin1a == 2
replace rec_letin1a = 5 if letin1a == 1

tab rec_letin1a letin1a, missing 


/*Code partyid == other party as partyid = independent*/

clonevar rec_partyid = partyid
replace rec_partyid = 3 if partyid == 7

codebook polviews


/*Recode explanatory variables*/

//Code family income last fall before taxes

clonevar rec_income = income
replace rec_income = 888 if income == .a
replace rec_income = 1 if income < 10
replace rec_income = 2 if income == 10 | income == 11
replace rec_income = 3 if income == 12

tab income rec_income 

codebook age
codebook degree
codebook race
codebook sex

keep panel_id panel monthintv dayintv year stata_intvdate rec_partyid polviews rec_natarms rec_natcrimy rec_natenvir rec_natfare rec_natrace rec_natsci rec_letin1a rec_income race sex degree age

/****Save data in .dta format for use in estimating comparison models***/

save "../GSS_2006_2020RecodedAppendedPanels.dta",replace

/****Export dataset as excel file for use in Fortran code to estimate our models***/

export excel using "GSS_combined_panels_subset.xls", firstrow(variables) nolabel replace missing("-999")

log close
translate log_recode.smcl log_recode.pdf

